---
title: "descriptive statistics and plots"
author: "Kota SUECHIKA"
date: "`r format(Sys.time(), '%Y-%m-%d')`"
output:
  html_document: default
  pdf_document: default
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, fig.width = 10, fig.height = 5)
rm(list=ls())
library(dplyr)
library(tidyverse)
library(ggplot2)
require(memisc)
library(stargazer)
library(psych)
library(gt)
library(summarytools)
library(fastDummies)
require(stringi)
require(ggpubr)

dat_lebanon <- read_csv("data/Lebanon.csv")
dat_lebanon <- dat_lebanon[-1,]

dat_lebanon$RecordedDate <- as.Date(stri_match_first_regex(dat_lebanon$RecordedDate, "\\d{4}-\\d{2}-\\d{2}"))

dat_lebanon$ceasefire <- NA
dat_lebanon$ceasefire[dat_lebanon$RecordedDate <= as.Date("2025-01-16")] <- "Pre"
dat_lebanon$ceasefire[as.Date("2025-01-17") <= dat_lebanon$RecordedDate] <- "Post"
dat_lebanon$ceasefire <- factor(dat_lebanon$ceasefire)

dat_lebanon <- dat_lebanon |> 
  mutate(Gender = as.numeric(D1),
         Education = as.numeric(D3),
         Income = as.numeric(D5),
         Age_group = as.numeric(D2),
         Sect = case_when(D6 == 1 ~ "Sunni",
                          D6 == 2 ~ "Shia",
                          D6 == 3 ~ "Maronites",
                          D6 == 4 ~ "Druze",
                          D6 == 5 ~ "Ismail",
                          D6 == 6 ~ "Armenian",
                          D6 == 7 ~ "Greek",
                          D6 == 8 ~ "Protestant",
                          D6 == 9 ~ "Minorities",
                          D6 == 10 ~ "Multi-sectarian",
                          D6 == 13 ~ "Secular"),
         Iran = case_when(Q2_2 >= 4 ~ "Like",
                          Q2_2 == 3 ~ "Neither",
                          Q2_2 <= 2 ~ "Dislike"))
      
# Shia party supporter
dat_lebanon <- dat_lebanon |> 
  mutate(Shia_party_score = (Q4_2 + Q4_3)/2) |> 
  mutate(Shia_party_supporter = case_when(
    Shia_party_score >= 7 ~ "Shia-party supporter",
    Shia_party_score < 7  ~ "Others"
  )) |> 
  mutate(Shia_party_supporter = factor(Shia_party_supporter))

# Patronage
dat_lebanon <- dat_lebanon |> 
  mutate(Patronage = case_when(Q4 >= 3 ~ "High",
                               Q4 <= 2 ~ "Low")) |>
  mutate(Patronage = factor(Patronage))

# High patronage × Shia party supporter
dat_lebanon <- dat_lebanon |>
  mutate(Patron_Shia = case_when(
    Shia_party_supporter == "Shia-party supporter" & Patronage == "High" ~ "Patron_Shia",
    TRUE ~ "Other"
  )) |>
  mutate(Patron_Shia = factor(Patron_Shia))
        
```

# gender
```{r}
gender_dat_lb <- data.frame(
  Gender = c("Female", "Male"),
  n = c(439, 754)
)

gender_dat_lb <- gender_dat_lb %>%
  mutate(Gender = factor(Gender, levels = c("Male", "Female"))) %>% 
  arrange(desc(Gender)) %>% 
  mutate(per = n / sum(n),
         label = paste0(Gender, "\n", scales::percent(per, accuracy = 0.1)),
         ymax = cumsum(n),
         ymin = lag(ymax, default = 0),
         ymid = (ymin + ymax) / 2)

p_gender <- ggplot(gender_dat_lb, aes(x = 1, y = n, fill = Gender)) +
  geom_col(width = 1, color = "white") +
  coord_polar(theta = "y", clip = "off") +
  geom_text(aes(x = 1, y = ymid, label = label),
            size = 6.5, fontface = "bold") +
  theme_void() +
  theme(
    legend.position = "none",
    plot.margin = margin(0, 0, 0, 0)
  ) +
  labs(title = "")

ggsave(
  filename = "result/gender_proportion.png",
  plot = p_gender,
  width = 6,
  height = 6,
  units = "in",
  dpi = 600,
  bg = "white"
)
```

# age
```{r}
age_dat_lb <- data.frame(
  Age_group = c("18-20", "21-30", "31-40", "41-50", "51-60", "60-"),
  n = c(134, 447, 349, 171, 74, 10)
)

lab_df <- age_dat_lb %>%
  arrange(desc(Age_group)) %>%
  mutate(per   = n / sum(n),
         label = paste0(Age_group, "\n", scales::percent(per, accuracy = 0.1)),
         ymax  = cumsum(n),
         ymin  = lag(ymax, default = 0),
         ymid  = (ymin + ymax) / 2)

# 「51-60」は内側に戻す
thresh  <- 0.05
outside <- lab_df %>% filter(per < thresh) %>% arrange(ymid)
inside  <- lab_df %>% filter(!(per < thresh))

# y 座標を整形
outside$ypos <- outside$ymid
inside$ypos  <- inside$ymid

p2 <- ggplot(age_dat_lb, aes(x = 1, y = n, fill = Age_group)) +
  geom_col(width = 1, color = "white") +
  coord_polar(theta = "y", clip = "off") +
  # 内側ラベル（51-60含む）
  geom_text(data = inside,
            aes(x = 1.00, y = ypos, label = label),
            fontface = "bold", size = 5.0, inherit.aes = FALSE) +
  # 外側ラベル（60-のみ外側）
  geom_text(data = outside,
            aes(x = 1.65, y = ypos, label = label),
            fontface = "bold", size = 5.0, inherit.aes = FALSE) +
  # リーダー線
  geom_segment(data = outside,
               aes(x = 1.25, xend = 1.52, y = ymid, yend = ypos),
               color = "black", linewidth = 0.5, inherit.aes = FALSE) +
  theme_void() +
  theme(
    legend.position = "none",
    plot.margin = margin(0, 0, 0, 0)
  ) +
  labs(title = "")

ggsave(
  filename = "result/age_proportion.png",
  plot = p2,
  width = 6,
  height = 6,
  units = "in",
  dpi = 600,
  bg = "white"
)

```

# sect
```{r}
p3 <- ggplot(data = dat_lebanon, aes(x = Sect, fill = Sect)) +
  geom_bar() +
  labs(
    title = "",
    x = "Sect",
    y = "Respondents"
  ) +
  theme(
    axis.title.x = element_text(size = 16),
    axis.title.y = element_text(size = 16),
    axis.text.x  = element_text(angle = 45, hjust = 1, size = 14),
    axis.text.y  = element_text(size = 14)
  )

p3

ggsave(filename = "result/sectarian_proportion.png",
       plot = p3,
       width = 15,
       height = 7,
       units = "in", 
       dpi = 600,
       bg = "white")
```

# descriptive statistics
```{r}

dat_lebanon <- dat_lebanon |>
  mutate(Male = if_else(Gender == 1, 1, 0),
         Female = if_else(Gender == 2, 1, 0),
         Sunni = if_else(Sect == "Sunni", 1, 0),
         Shia = if_else(Sect == "Shia", 1, 0),
         Maronites = if_else(Sect == "Maronites", 1, 0),
         Druze = if_else(Sect == "Druze", 1, 0),
         Ismail = if_else(Sect == "Ismail", 1, 0),
         Armenian = if_else(Sect == "Armenian", 1, 0),
         Greek = if_else(Sect == "Greek", 1, 0),
         Protestant = if_else(Sect == "Protestant", 1, 0),
         Minorities = if_else(Sect == "Minorities", 1, 0),
         Multi_sectarian = if_else(Sect == "Multi-sectarian", 1, 0),
         Secular = if_else(Sect == "Secular", 1, 0),
         ceasefire_pre = if_else(ceasefire == "Pre", 1, 0),
         ceasefire_post = if_else(ceasefire == "Post", 1, 0),
         Patron_Shia_high = if_else(Patron_Shia == "Patron_Shia", 1, 0),
         Patron_Shia_low = if_else(Patron_Shia == "Other", 1, 0),
         Shia_party = if_else(Shia_party_supporter == "Shia-party supporter", 1, 0),
         Others = if_else(Shia_party_supporter == "Others", 1, 0))

desc_stat_lb <- dat_lebanon |>
  dplyr::select(ceasefire_pre, ceasefire_post, Patron_Shia_high, Patron_Shia_low,
                Sunni, Shia, Maronites, Druze, Ismail, Armenian,
                Greek, Protestant, Minorities, Multi_sectarian, Secular,
                Gender, Male, Female, Age, Education, Income, Shia_party, Others)
str(desc_stat_lb)
summary(desc_stat_lb)
stargazer(as.data.frame(desc_stat_lb), type = "text", summary = TRUE)

summarytools::descr(desc_stat_lb, stats = c("n.valid", "mean", "sd", "min", "max"), 
                    transpose = TRUE, order = "p") |>
  memisc::write_html(file = "result/descriptive_statistics.html")

df_desc_stat_lb <- summarytools::descr(desc_stat_lb, stats = c("n.valid", "mean", "sd", "min", "max"), 
                    transpose = TRUE, order = "p")

gt(df_desc_stat_lb,
   rownames_to_stub = TRUE) |>
  fmt_number(columns = 3:4, decimals = 3)

```

